/*
Do-file name: 5a_PrepareDataAKM.do
Author: Thiago
Date: Nov 7th 2018
This version: April 16th 2019
Comments: This do-file treats the court data before use for descriptives and AKM estimates. 
It saves an intermediary file with clean court info.
*/
clear all

*Raw dataset currently being used includes RAIS info
use "$data_input/identified/JusticaAberta_data_raw.dta"


*Cleaning duplicate Judge names
replace Judge = "VANDIMARA GALVAO RAMOS PAIVA ZANOLO" if Judge_idP == 11729
replace Judge_idP = 11730 if Judge_idP == 11729
replace Judge = "GLORIA HELOIZA LIMA SILVA MARTINS CASARIN" if Judge_idP == 4945
replace Judge_idP = 4944 if Judge_idP == 4945
replace Judge = "GLEIDSON OLIVEIRA GRISOSTE BARBOSA" if Judge_idP == 4933
replace Judge_idP = 4934 if Judge_idP == 4933
replace Judge = "TULIO DUAILIBI ALVES SOUZA" if Judge_idP == 11618
replace Judge_idP = 11617 if Judge_idP == 11618
replace Judge = "ANA PAULA VEIGA CARLOTA MIRANDA" if Judge_idP == 888
replace Judge_idP = 889 if Judge_idP == 888
replace Judge = "MILENE GLORIA PINTO VASSAL" if Judge_idP == 8944
replace Judge_idP = 8945 if Judge_idP == 8944
replace Judge = "JOAO THIAGO FRANCA GUERRA" if Judge_idP == 5907
replace Judge_idP = 5906 if Judge_idP == 5907
replace Judge = "GERARDO HUMBERTO ALVES SILVA JUNIOR" if Judge_idP == 4725
replace Judge_idP = 4724 if Judge_idP == 4725
replace Judge = "WLADIS ROBERTO FREIRE AMARAL" if Judge_idP == 12103
replace Judge_idP = 12104 if Judge_idP == 12103
replace Judge = "PAULA SAIDE BIAGI MESSEN MUSSI CASAGRANDE" if Judge_idP == 9513
replace Judge_idP = 9514 if Judge_idP == 9513
replace Judge = "ANTONIA SIQUEIRA GONCALVES RODRIGUES" if Judge_idP == 1210
replace Judge_idP = 1211 if Judge_idP == 1210
replace Judge = "RHAMICE IBRAHIM ALI AHMAD ABDALAH" if Judge_idP == 10230
replace Judge_idP = 10231 if Judge_idP == 10230
replace Judge = "LUCIA REGINA ESTEVES MAGALHAES" if Judge_idP == 7100
replace Judge_idP = 7099 if Judge_idP == 7100
replace Judge = "MARIA EROTIDES CNEIP BARANJAC" if Judge_idP == 8338
replace Judge_idP = 8337 if Judge_idP == 8338
replace Judge = "OLINDA QUADROS ALTOMARE CASTRILON" if Judge_idP == 9311
replace Judge_idP = 9312 if Judge_idP == 9311
replace Judge = "JEAN ALBERT SOUZA SAADI" if Judge_idP == 5666
replace Judge_idP = 5667 if Judge_idP == 5666
replace Judge = "GERALDO FERNANDES FIDELIS NETO" if Judge_idP == 4707
replace Judge_idP = 4708 if Judge_idP == 4707
replace Judge = "MARIA APARECIDA COSTA BASTOS" if Judge_idP == 8228
replace Judge_idP = 8227 if Judge_idP == 8228
replace Judge = "MARCELO SEBASTIAO PRADO MORAS" if Judge_idP == 7831
replace Judge_idP = 7832 if Judge_idP == 7831
replace Judge = "VIVIANE ALONSO ALCMIM" if Judge_idP == 11899
replace Judge_idP = 11900 if Judge_idP == 11899
replace Judge = "WENDEL CARIELI GUEDES SIMPLICIO" if Judge_idP == 12039
replace Judge_idP = 12040 if Judge_idP == 12039
replace Judge = "JOAO FRANCISCO CAMPOS ALMEIDA" if Judge_idP == 5816
replace Judge_idP = 5817 if Judge_idP == 5816
replace Judge = "LEONARDO CAMPOS COSTA E SILVA PITALUGA" if Judge_idP == 6818
replace Judge_idP = 6819 if Judge_idP == 6818
replace Judge = "EDUARDO CALMON ALMEIDA CEZAR" if Judge_idP == 3386
replace Judge_idP = 3387 if Judge_idP == 3386
replace Judge = "MARCOS TERENCIO AGOSTINHO PIRES" if Judge_idP == 8142
replace Judge_idP = 8143 if Judge_idP == 8142


*Cleaning name of courts
gen Court_name_clean = upper(Court_name)
replace Court_name_clean=subinstr(Court_name_clean,"Á", "A", .)
replace Court_name_clean=subinstr(Court_name_clean,"Â", "A", .)
replace Court_name_clean=subinstr(Court_name_clean,"„", "A", .)
replace Court_name_clean=subinstr(Court_name_clean,"·", "A", .)
replace Court_name_clean=subinstr(Court_name_clean,"Ù", "U", .)
replace Court_name_clean=subinstr(Court_name_clean,"Ú", "U", .)
replace Court_name_clean=subinstr(Court_name_clean,"Í", "I", .)
replace Court_name_clean=subinstr(Court_name_clean,"Û", "U", .)
replace Court_name_clean=subinstr(Court_name_clean,"È", "E", .)
replace Court_name_clean=subinstr(Court_name_clean,"É", "E", .)
replace Court_name_clean=subinstr(Court_name_clean,"Ê", "E", .)
replace Court_name_clean=subinstr(Court_name_clean,"Ô", "O", .)
replace Court_name_clean=subinstr(Court_name_clean,"Ó", "O", .)
replace Court_name_clean=subinstr(Court_name_clean,"‚", "A", .)
replace Court_name_clean=subinstr(Court_name_clean,"Ì", "I", .)
replace Court_name_clean=subinstr(Court_name_clean,"˙", "U", .)
replace Court_name_clean=subinstr(Court_name_clean,"ı", "O", .)
replace Court_name_clean=subinstr(Court_name_clean,"Ç", "C", .)

*Manually correcting small mistakes in court assignment
replace court_level = 3 if court_level == 5 & State == "CE"					//Fortaleza used to be special level, but changed to 3rd level in 2009 (https://www.tjce.jus.br/noticias/comarcas-do-ceara-ja-tem-nova-organizacao-judiciaria/)
replace court_level = 1 if Municipality == "TERENOS"					    //One Municipality in MS had court_level==0, change to Entrancia Inicial since Vara Unica
replace court_level = 3 if State == "SE" & court_level == 2

lab def court_level 1"Initial" 2"Intermediate" 3"Final" 4"Unique" 5"Special"
lab val court_level court_level


/******* IMPORTANT - DROPPING OBSERVATIONS **********
All courts named SJBA, in the state of BA, are actual federal courts. I checked the names of the judges working 
on them and they are all Federal, not State, Judges. 
In several other states in the NE, the same is true for courts with names such as "10a vara", "11a vara", etc...
So I'll drop all observations from those courts.
*/

gen federal = (regexm(Court_name_clean, "SJ"))
replace federal = 1 if (inlist(Court_name_clean, "10ª VARA","11ª VARA","12ª VARA","13ª VARA","14ª VARA","15ª VARA","16ª VARA","17ª VARA","18ª VARA"))
replace federal = 1 if (inlist(Court_name_clean, "19ª VARA", "20ª VARA","21ª VARA","22ª VARA", "23ª VARA","24ª VARA", "25ª VARA","26ª VARA", "27ª VARA"))
replace federal = 1 if (inlist(Court_name_clean, "28ª VARA", "29ª VARA","30ª VARA", "31ª VARA", "32ª VARA", "33ª VARA", "34ª VARA", "35ª VARA"))
replace federal = 1 if (inlist(Court_name_clean, "36ª VARA", "37ª VARA") )

gen unique_court = Court_name_clean + "_" + State

drop if federal == 1


/*Now I collapse the dataset to the court level (using unique identifiers), create the distinction of type courts
and produce some descriptive stats at the court level. I then merge the court-level data back to the CNJ dataset.
*/
preserve
	 collapse (first) Municipality State Court_name Court_name_clean court_level, by(Court_idP)  //9,699 unique courts
	 
	 gen type_unica = (regexm(Court_name_clean, "UNIC") | regexm(Court_name_clean, "PLENA") | regexm(Court_name_clean, "úNICA") | regexm(Court_name_clean, "VARA JUDICIAL")) | (regexm(Court_name_clean, "VINCULAD"))
	 gen type_criminal = (regexm(Court_name_clean, "CRIMINA") | regexm(Court_name_clean, "PENA") | regexm(Court_name_clean, "CRIM.") | regexm(Court_name_clean, "ENTORPEC")| regexm(Court_name_clean, "TOXIC")| regexm(Court_name_clean, "JURI")| regexm(Court_name_clean, "JúRI"))
	 gen type_civel = (regexm(Court_name_clean, "CIVEL") | regexm(Court_name_clean, "CíVEL") | regexm(Court_name_clean, "CIVIL"))
	 gen type_familia = (regexm(Court_name_clean, "FAMILIA") | regexm(Court_name_clean, "INFANCIA") | regexm(Court_name_clean, "FAMíLIA") | regexm(Court_name_clean, "INFâNCIA") | regexm(Court_name_clean, "INF.")| regexm(Court_name_clean, "MENORES")| regexm(Court_name_clean, "SUCESS") | regexm(Court_name_clean, "ADOLE") | regexm(Court_name_clean, "MULHER"))
	 gen type_fazenda = (regexm(Court_name_clean, "FAZ") | regexm(Court_name_clean, "BANCAR") | regexm(Court_name_clean, "FISC") )
	 gen type_empresa = (regexm(Court_name_clean, "COMERC") | regexm(Court_name_clean, "EMPRESAR") | regexm(Court_name_clean, "REGISTR") | regexm(Court_name_clean, "FALENC")| regexm(Court_name_clean, "PRECAT")| regexm(Court_name_clean, "FAL.")| regexm(Court_name_clean, "TRIBUT") | regexm(Court_name_clean, "TITULOS")| regexm(Court_name_clean, "TíTULOS"))
	 gen type_JE = (regexm(Court_name_clean, "ESPECIA") | regexm(Court_name_clean, "JEC") | regexm(Court_name_clean, "JUIZADO") | regexm(Court_name_clean, "JUIZ. ESP."))
	 gen type_outro = (regexm(Court_name_clean, "MILITAR") |  regexm(Court_name_clean, "RECURSO") | regexm(Court_name_clean, "TRABALHO") |  regexm(Court_name_clean, "CONCILIA") |  regexm(Court_name_clean, "TRANSITO") |  regexm(Court_name_clean, "AGRARI") |  regexm(Court_name_clean, "FUNDIARIO"))
	 
	 gegen tot = rowtotal(type_criminal type_civel type_familia type_JE type_unica type_fazenda type_empresa type_outro)
	 
	 gen type_court = .
	 replace type_court = 1 if type_unica == 1
	 replace type_court = 2 if type_criminal == 1
	 replace type_court = 3 if type_civel == 1
	 replace type_court = 4 if type_familia == 1
	 replace type_court = 5 if type_fazenda == 1
	 replace type_court = 6 if type_empresa == 1
	 replace type_court = 7 if type_JE == 1
	 replace type_court = 8 if type_outro == 1
	 
	 replace type_court = 1 if (Court_name_clean == "VARA JURISDICÃO PLENA" | Court_name_clean == "VARA JURISDICAO PLENA" | Court_name_clean == "VARA JURISDICÃO UNICA")

	 *Change specific courts manually
	 replace  type_court = 1 if State == "AM" & type_court == .
	 replace  type_court = . if State == "AM" & type_court == 1 & (regexm(Court_name_clean, "1") | regexm(Court_name_clean, "2") | regexm(Court_name_clean, "3") | regexm(Court_name_clean, "4") | regexm(Court_name_clean, "5"))
	 replace  type_court = 1 if State == "SE" & type_court == .						// all initial level, assign  unique courts
	 replace  type_court = 8 if State == "BA" & type_court == .  & regexm(Court_name_clean, "SAJ")
	 replace  type_court = 8 if State == "BA" & inlist(Court_name_clean, "1º VARA DE RELACÕES DE CONSUMO"," NúCLEO DE PRISãO EM FLAGRANTE")
						
	 replace  type_court = 1 if type_court == . & court_level == 1 									//As a rule Initial courts are of general interest, not specialized
	 
	 *Forcing all 397 courts to be "other"; can't find information for most of them so unclear whether they are general courts
	 replace type_court = 8 if type_court == .
	 
	 lab def type_court 1"General Court" 2"Criminal Court" 3"Civil Court" 4"Family Court" 5"Public Court" 6"Comercial Court" 7"Small-stakes court" 8"Other courts"
	 lab val  type_court type_court
	 lab val court_level court_level
	 drop type_unica type_criminal type_civel type_familia type_fazenda type_empresa type_JE type_outro
	 
	 
	 tabout type_court  using "$out/tab_type.tex", replace style(tex) f(0)
	 tabout type_court court_level using "$out/tabulate_court.tex", replace style(tex) cell(row) 
	 tabout State court_level using "$out/tabulate_state.tex", replace style(tex) cell(row) 
	 /* Checks on court level
	  - ES, SC have entrancia especial, assign level == 5 for those courts
	  - DF has no entrancia system, assign level == 4 for all DF courts
	  - AM,AP,SC do not have "entrancia intermediaria (no courts with level == 2)
	  - SE does not have entrancia 
	 */

	 tempfile vara
	 save "`vara'"

restore

merge m:1 Court_idP using "`vara'"
assert _merge == 3
drop _merge

lab val type_court type_court

recode type_court (5 6 8 = 6) (7 = 5), gen(type_court_short)
tab type_court_short, gen(type_court_short_)

tab court_level, gen(court_level_)

*Merging with details from judicial districts (Comarcas)
merge m:1 Munic_code3 using "$data_input/Malha_Estadual_2012_original"
drop if _merge == 2
drop _merge

gen str name = upper(word(Judge, 1))

*Assigns gender to individual first names using Census name classification
preserve
	 import delimited "$data_input/nomes.csv", clear 
	 rename first_name name
	 replace name = upper(name)
	 keep name classification
	 tempfile temp
	 save "`temp'"
restore

merge m:1 name using "`temp'" // Only 19 individuals are not matched to a gender by first name
drop if _merge  == 2
drop _merge

rename classification gender


*** PROMOTION PATTERNS ****
sort Judge_idP YM Court_idP
by Judge_idP: gen promotion_unique = promotion_max if [_n]==1
by Judge_idP: gen instance_first = court_level if _n == 1
lab val instance_first court_level

gen promotion_month = YM if promotion == 1
bys Judge_idP promotion_month: gen promotion_total = 1 if [_n]==1
replace promotion_total = . if promotion_month == .

sort Judge_idP
gegen promotion_number = total(promotion_total), by(Judge_idP)
bys Judge_idP: gen promotion_times = promotion_number if [_n]==1

sort Judge_idP YM
gegen higher_month = max(court_level), by(Judge_idP YM)
replace higher_month = . if (promotion!=1 | higher_month!=court_level)
bys Judge_idP YM: gen higher_month_unique = higher_month if [_n]==1


**Keeping only variables of interest
global vars_keep "Judge_idP Judge State_cod State  Court_idP Court_name_clean YM  Munic_code3 Munic_code2 Municipality Sent_Trial_Merits_J-Declared_Impediments_SuspicionsJ Judge_Court_idP promotion-promotion_alt_max federal-higher_month_unique Process_Await_Compliance_C Process_Await_Progress_C Allocated_C Process_Ongoing_C Court_Appeal_C"
keep  $vars_keep

encode Court_name_clean, gen(Court_name)
encode Municipality, gen(Municipality_enc)
encode unique_court, gen(unique_court_enc)
*encode comarca, gen(comarca_enc)
*encode Judge, gen(Judge_name)

drop Munic name Court_name_clean Municipality unique_court 

compress *

sort Judge_idP YM Court_idP
order Judge_idP Judge State_cod State  Court_idP Court_name YM  Munic_code3 Munic_code2 Sent_Trial_Merits_J 

save "$temp/CnjDataset_clean.dta", replace
